setwd("~/Research/Mycorrhizae/NPP modeling")
library(tidyr)
library(dplyr)

# this is Angel's finalized dataset and contains forest plantations as well as
# tropical forests for which there is no soil data
#data <- read.csv("finalized.data.csv")
#data <- data[order(data$ID),]
#data <-data[which(data$Climate2 !="Tropical"),]

# this is the final dataset that I will use for this analysis, containing
# boreal, cold and warm temperate forests plus soil data
boreal.data <- read.csv("Forest Productivity Dataset.csv")


# rename and convert to numeric variables
# climatic 
boreal.data$Elevation <- as.numeric(boreal.data$Elevation..m)
boreal.data$Precip <- as.numeric(boreal.data$Precip.mm)
boreal.data$Temp.mean <- as.numeric(boreal.data$Air.temp.mean..C)
boreal.data$Temp.min <- as.numeric(boreal.data$Air.temp.min...C)
boreal.data$Temp.max <- as.numeric(boreal.data$Air.temp.max..C)

# NPP
boreal.data$ANPP <- as.numeric(boreal.data$ANPP..Mg.ha.yr)
boreal.data$BNPP <- as.numeric(boreal.data$BNPP..Mg.ha.yr)
boreal.data$TNPP <- as.numeric(boreal.data$TNPP..Mg.ha.yr)

# soil
boreal.data$FF.mass <- as.numeric(boreal.data$Forest.floor.mass..Mg.ha)
boreal.data$FF.MRT <- as.numeric(boreal.data$FF.MRT..yrs)
boreal.data$FF.N <- as.numeric(boreal.data$FF.N..Mg.ha)
boreal.data$FF.N.MRT <- as.numeric(boreal.data$FF.N.MRT..yrs)
boreal.data$FF.P <- as.numeric(boreal.data$FF.P..Mg.ha)
boreal.data$FF.P.MRT <- as.numeric(boreal.data$FF.P.MRT..yrs)

boreal.data$AlitterN <- as.numeric(boreal.data$ALitterN..Mg.ha.yr)
boreal.data$Alitter <- as.numeric(boreal.data$ALitter..Mg.ha.yr)
boreal.data$Blitter <- as.numeric(boreal.data$BLitter..Mg.ha.yr)
boreal.data$BlitterN <- as.numeric(boreal.data$Blitter.N..Mg.ha.yr)
boreal.data$AlitterP <- as.numeric(boreal.data$Alitter.P..Mg.ha.yr)

boreal.data$SoilN <- as.numeric(boreal.data$Soil.N..Mg.ha)
boreal.data$SoilP <- as.numeric(boreal.data$Soil.P..Mg.ha)
boreal.data$Soil.OM <- as.numeric(boreal.data$Soil.OM..Mg.ha)
boreal.data$Soil.texture <- as.factor(boreal.data$Soil.texture)
boreal.data$Soil.texture5 <- as.factor(boreal.data$Soil.texture5)
boreal.data$Soil.order <- as.factor(boreal.data$Soil.order)
boreal.data$Soil.order5 <- as.factor(boreal.data$Soil.order5)

# only select data for which anpp OR bnpp OR tnpp values are provided
boreal.data <- boreal.data[ which(boreal.data$ANPP != "NA" | boreal.data$BNPP != "NA"),]

#  total number should be 183 entries of boreal, cold temperate and 
# warm temperate entries

# choose either a full or 5 level classification of soil order and texture
droplevels(boreal.data$Soil.order)
#droplevels(boreal.data$Soil.order5)
droplevels(boreal.data$Soil.texture)
#droplevels(boreal.data$Soil.texture5)

dim(boreal.data)

summary(boreal.data$Soil.order)
summary(boreal.data$Soil.order5)
summary(boreal.data$Soil.texture)
summary(boreal.data$Soil.texture5)

# remove entries with some very high TNPP values
plot(boreal.data$TNPP)
boreal.data <- boreal.data[-173,]
boreal.data <- boreal.data[-166,]
plot(boreal.data$TNPP)

dim(boreal.data)

# add phenology as variable
boreal.data$phenology <- as.vector(0)

for (i in 1:181){
  if (boreal.data$Climatic.forest.type.[i] == "BOBLDE" | 
      boreal.data$Climatic.forest.type.[i] == "CTBLDE" | 
      boreal.data$Climatic.forest.type.[i] == "CTNLDE" | 
      boreal.data$Climatic.forest.type.[i] == "BONLDE" | 
      boreal.data$Climatic.forest.type.[i] == "WTBLDE"){
    boreal.data$phenology[i] <- "deciduous"
  }
}

# add phenology as variable
for (i in 1:181){
  if (boreal.data$Climatic.forest.type.[i] == "BONLEV" | 
      boreal.data$Climatic.forest.type.[i] == "CTNLEV" | 
      boreal.data$Climatic.forest.type.[i] == "WTNLEV" | 
      boreal.data$Climatic.forest.type.[i] == "CTBLEV") 
      {
        boreal.data$phenology[i] <- "evergreen"
      }
}

# add climate type as variable
boreal.data$climate.zone <- as.vector(0)
for (i in 1:181){
  if (boreal.data$Climatic.forest.type.[i] == "BONLEV" | 
      boreal.data$Climatic.forest.type.[i] == "BONLDE" | 
      boreal.data$Climatic.forest.type.[i] == "BOBLDE")
  {
    boreal.data$climate.zone[i] <- "boreal"
  }
}

for (i in 1:181){
  if (boreal.data$Climatic.forest.type.[i] == "CTNLEV" | 
      boreal.data$Climatic.forest.type.[i] == "CTBLDE" | 
      boreal.data$Climatic.forest.type.[i] == "CTBLEV" |
      boreal.data$Climatic.forest.type.[i] == "CTNLDE")
  {
    boreal.data$climate.zone[i] <- "cold temperate"
  }
}

for (i in 1:181){
  if (boreal.data$Climatic.forest.type.[i] == "WTBLDE" | 
      boreal.data$Climatic.forest.type.[i] == "WTNLEV" )
  {
    boreal.data$climate.zone[i] <- "warm temperate"
  }
}


# examine correlation plots
M <- boreal.data %>% select (ANPP, BNPP, Elevation, Precip, Temp.mean, Temp.min, Temp.max,
                             FF.mass, FF.MRT, FF.N, FF.N.MRT, FF.P, FF.P.MRT,
                             Alitter, AlitterN, AlitterP, Blitter, BlitterN,
                             SoilN, Soil.OM)
                             SoilP
                             
M1 <- M[complete.cases(M),]
                             
library(corrplot)
B <- cor(M, use = "complete.obs")
corrplot(B)
B.df <- data.frame(B)
#write.csv(B.df,'correlationmatrix.csv')

index <- which(abs(B) > .50 & abs(B) < 1, # your criteria
               arr.ind = T) # the result of the which function is now in rows & columns
cbind.data.frame(stock1 = rownames(B)[index[,1]], # get the row name 
                 stock2 = colnames(B)[index[,2]]) # get the column name

write.csv(M, "covariates.csv")

